Figures for Marta’s paper.
library(rhdf5)
library(dplyr)
library(Seurat)
library(scales)
library(ggplot2)
suppressPackageStartupMessages(library(ComplexHeatmap))
plot_ps <- function(adata, by='dpt_pseudotime') {
f_df <- as.data.frame(adata@reductions$umap@cell.embeddings)
f_df$color <- adata@meta.data[[by]]
f_plot <- ggplot(f_df, aes(x=UMAP_1, y=UMAP_2, color=color)) +
geom_point() + scale_color_continuous(type = "viridis")
# scale_color_gradient(low="blue", high="yellow")
return (f_plot)
}
counts <- as.data.frame(h5read('../data/processed/04_dataset.h5ad', 'raw/X'))
colnames(counts) <- h5read('../data/processed/04_dataset.h5ad', '/obs/Well_ID')
rownames(counts) <- h5read('../data/processed/04_dataset.h5ad', '/var/_index')
metadata <- read.csv('../data/processed/04_dataset-metadata.csv')
rownames(metadata) <- metadata$Well_ID
subset <- CreateSeuratObject(counts, meta.data = metadata)
subset@assays$RNA@scale.data <- h5read('../data/processed/04_dataset.h5ad', '/X')
# colnames(subset@assays$RNA@scale.data) <- h5read('../data/processed/04_dataset.h5ad', '/obs/Well_ID')
# rownames(subset@assays$RNA@scale.data) <- h5read('../data/processed/04_dataset.h5ad', '/var/_index')
subset@assays$RNA@scale.data <- as.matrix(counts)
subset@meta.data$louvain <- as.factor(subset@meta.data$louvain)
umap_coor <- t(h5read('../data/processed/04_dataset.h5ad', '/obsm/X_umap'))
rownames(umap_coor) <- h5read('../data/processed/04_dataset.h5ad', '/obs/Well_ID')
colnames(umap_coor) <- c('UMAP_1', 'UMAP_2')
subset@reductions$umap <- Seurat::CreateDimReducObject(
embeddings = umap_coor,
assay = 'RNA',
key = 'UMAP_'
)
cc <- readRDS('../data/mouse_cell_cycle_genes.rds')
stage_colors <- c('2iLIF_PrE' = '#8E9090',
# 'D0_PrE' = '#97918C',
'D1' = '#11839A',
'D2' = '#8E8E0D',
'D3' = '#E4B32E',
'D4' = '#C4651C',
'D7' = '#BD4306'
)
seurat_colors <- c('0' = '#0DA257',
'1' = '#A6856D',
'2' = '#0096BF',
'3' = '#C28153',
'4' = '#A6363A',
'5' = '#4F9996',
'6' = '#8C8177',
'7' = '#E89E85',
'8' = '#B6B44E'
)
cc_colors <- c('#FAA198', '#BDB76B', '#FFE4C4')
subset <- CellCycleScoring(subset, s.features = cc$s.genes, g2m.features = cc$g2m.genes)
DimPlot(subset, group.by = 'louvain', cols = as.vector(seurat_colors))
DimPlot(subset, group.by = 'Stage', cols = as.vector(stage_colors))
subset@meta.data %>%
group_by(Stage) %>%
summarize('counts' = n())
# Undiff
subset@meta.data %>%
filter(louvain %in% c('0', '2')) %>%
group_by(Stage) %>%
summarize('counts' = n())
# Diff
subset@meta.data %>%
filter(louvain %in% c('3', '4', '7')) %>%
group_by(Stage) %>%
summarize('counts' = n())
Estimating mean and median from 2iLIF. Only
take into account 2iLIF which were loaded with other Stages. Plate 1
which is only 2iLIF is ignored.
full_meta <- read.csv('../data/processed/04_dataset-metadata.csv')
tmp <- full_meta[(full_meta$plate_ID != '1' & full_meta$Stage == '2iLIF_PrE'), 'P3.FSC.A.Geo.Mean']
cell_mean <- mean(tmp, na.rm=T)
cell_median <- median(tmp, na.rm=T)
subset@meta.data$cs_mean <- subset$P3.FSC.A.Geo.Mean / cell_mean
subset@meta.data$cs_median <- subset$P3.FSC.A.Geo.Mean / cell_median
subset@meta.data['Status'] <- c()
subset@meta.data[subset@meta.data$louvain %in% c('1', '6', '5'), 'Status'] <- '2iLIF'
subset@meta.data[subset@meta.data$louvain %in% c('3', '4', '7'), 'Status'] <- 'PrEDiff'
subset@meta.data[subset@meta.data$louvain %in% c('0', '2'), 'Status'] <- 'NEDiff'
DimPlot(subset, group.by = 'Stage', cols = stage_colors) + theme_classic(base_size = 12)
DimPlot(subset, group.by = 'louvain', cols = seurat_colors) + theme_classic(base_size = 12)
DimPlot(subset, group.by = 'Phase', cols = cc_colors) + theme_classic(base_size = 12)
plot_ps(subset) + theme_classic(base_size = 12)
tmp <- subset
Idents(tmp) <- tmp$louvain
commonR::plot_proportion_bar(tmp, group.by = 'Stage', order = c('1', '6', '5', '0', '2', '3', '7', '4')) +
ggtitle('Stage proportion per per cluster') +
scale_fill_manual(values = stage_colors) +
theme_classic(base_size = 12)
p1 <- commonR::plot_proportion_bar(tmp[, tmp$Status == '2iLIF'], group.by = 'Stage') +
facet_grid(cols = vars('2iLIF')) +
scale_fill_manual(values = stage_colors) +
theme(legend.position="none")
p2 <- commonR::plot_proportion_bar(tmp[, tmp$Status == 'NEDiff'], group.by = 'Stage') +
facet_grid(cols = vars('NEDiff')) +
scale_fill_manual(values = stage_colors) +
theme(legend.position="none")
p3 <- commonR::plot_proportion_bar(tmp[, tmp$Status == 'PrEDiff'], group.by = 'Stage',
order = c('3', '7', '4')) +
facet_grid(cols = vars('PrEDiff')) +
scale_fill_manual(values = as.vector(stage_colors), breaks=names(stage_colors), drop=F)
p1 + p2 + p3
Heatmap of endoderm markers markers visualized in D0, D2 and D7. Should be progressive
genes <- c("Leo1", "Rtf1", "Tnrc6c", "Fn1", "Nanog", "Sox2", "Nodal", "Cdc73", "Ctr9",
"Ctnnb1", "Itgav", "Paf1", "Pou5f1", "Smad2", "Dusp5", "Hnf1b", "Sox7", "Eomes", "Dusp1", "Hsbp1", "Itga5",
"Gata4", "Setd2", "Sox17", "Dkk1", "Dusp4", "Gata6")
tmp <- subset[, subset$louvain %in% c('1', '3', '7', '4') & subset$Stage %in% c('2iLIF_PrE', 'D2', 'D7')]
# DoHeatmap(tmp, features = genes, group.by = 'Stage', angle = 15, group.colors = stage_colors) +
DoHeatmap(tmp, features = genes, group.by = 'Stage', angle = 15, group.colors = as.vector(stage_colors[c('2iLIF_PrE', 'D2', 'D7')])) +
scale_fill_gradientn(colors = c("blue", "white", "red"))
# dittoHeatmap(tmp, genes, order.by = "Stage", annot.by = c("Stage"))
mat <- GetAssayData(tmp[genes, ]) %>% as.matrix()
cluster_anno <- tmp[genes, ]$Stage
col_fun = circlize::colorRamp2(c(-2, 0, 2), c("blue", "white", "red"))
Heatmap(mat,
name = 'Expression',
column_split = factor(cluster_anno),
show_column_names = F,
show_column_dend = F,
show_row_dend = F,
cluster_rows = T,
cluster_columns = F,
col = col_fun,
top_annotation = HeatmapAnnotation(foo = anno_block(gp = gpar(
fill = c('#948170', '#379F4C', '#2096B3', '#F2A082', '#CE7E41', '#B22222')))
)
)
DoHeatmap(subset, features = genes, group.by = 'louvain', angle = 15, group.colors = seurat_colors) +
scale_fill_gradientn(colors = c("blue", "white", "red"))
mat <- GetAssayData(subset[genes, ]) %>% as.matrix()
cluster_anno <- factor(subset[genes, ]$louvain, levels = c('1', '6', '5', '0', '2', '3', '7', '4'))
col_fun = circlize::colorRamp2(c(-2, 0, 2), c("blue", "white", "red"))
# Heatmap(mat,
# name = 'Expression',
# column_split = factor(cluster_anno),
# show_column_names = F,
# show_column_dend = F,
# show_row_dend = F,
# cluster_rows = T,
# cluster_columns = F,
# col = col_fun,
# top_annotation = HeatmapAnnotation(foo = anno_block(gp = gpar(
# fill = c('#948170', '#379F4C', '#2096B3', '#F2A082', '#CE7E41', '#B22222')))
# )
# )
Bar plot of cell cycle from 2i to D7.
tmp <- subset[, subset$louvain %in% c('1', '6', '5', '3', '7', '4')]
Idents(tmp) <- tmp$Stage
commonR::plot_proportion_bar(tmp, group.by = 'Phase') +
scale_fill_manual(values=cc_colors) +
theme_classic(base_size = 12) +
ggtitle('Cell cycle progression in PrEDiff trajectory')
nowotschin <- readRDS("../data/processed/nowotschin.rds")
nowotschin[['Celltype_combined']] <- paste(nowotschin$Timepoint, nowotschin$CellType, sep = '-')
nowotschin <- nowotschin[, nowotschin$CellType %in% c('ICM', 'EPI', 'PrE', 'VE')]
Idents(nowotschin) <- nowotschin$Celltype_combined
commonR::plot_proportion_bar(nowotschin, group.by = 'Phase') +
scale_fill_manual(values=cc_colors) +
theme_classic(base_size = 12) +
ggtitle('Cell cycle progression trajectory')
nowotschin_sub <- nowotschin[, nowotschin$Celltype_combined %in% c(
"E3.5-ICM",
"E3.5-PrE",
"E4.5-PrE",
"E5.5-VE"
)]
commonR::plot_proportion_bar(nowotschin_sub, group.by = 'Phase') +
scale_fill_manual(values=cc_colors) +
theme_classic(base_size = 12) +
ggtitle('Cell cycle progression trajectory')
plot_ps(subset, by='P3.mCherry.561D.A.Geo.Mean') +
theme_classic(base_size = 12) +
ggtitle('FACS Hhex expression')
df <- subset@meta.data %>% filter(louvain %in% c('1', '6', '5'))
p1 <- ggplot(df, aes(x=louvain, y=P3.mCherry.561D.A.Geo.Mean, fill=louvain)) +
geom_boxplot() +
geom_jitter(width=0.1,alpha=0.2) +
labs(fill = 'Clusters') + xlab("Clusters") + ylab("FCS Hhex expression") +
coord_cartesian(ylim = c(0, 4000)) +
scale_fill_manual("legend", values = seurat_colors) +
theme(legend.position="none") +
facet_grid(cols = vars('2iLIF_PrE'))
df <- subset@meta.data %>% filter(louvain %in% c('0', '2'))
df$louvain <- factor(df$louvain, levels = c('0', '2'))
p2 <- ggplot(df, aes(x=louvain, y=P3.mCherry.561D.A.Geo.Mean, fill=louvain)) +
geom_boxplot() +
geom_jitter(width=0.1,alpha=0.2) +
labs(fill = 'Clusters') + xlab("Clusters") + ylab("FCS Hhex expression") +
coord_cartesian(ylim = c(0, 4000)) +
scale_fill_manual("legend", values = seurat_colors) +
theme(legend.position="none") +
facet_grid(cols = vars('NEDiff'))
df <- subset@meta.data %>% filter(louvain %in% c('3', '7', '4'))
df$louvain <- factor(df$louvain, levels = c('3', '7', '4'))
p3 <- ggplot(df, aes(x=louvain, y=P3.mCherry.561D.A.Geo.Mean, fill=louvain)) +
geom_boxplot() +
geom_jitter(width=0.1,alpha=0.2) +
labs(fill = 'Clusters') + xlab("Clusters") + ylab("FCS Hhex expression") +
coord_cartesian(ylim = c(0, 4000)) +
# scale_fill_manual("legend", values = seurat_colors) +
theme(legend.position="none") +
facet_grid(cols = vars('PrEDiff'))
p1 + p2 + p3
df <- subset@meta.data
df <- df[df$louvain %in% c('1', '6', '5', '0', '2', '3', '7', '4'), ]
df$louvain <- factor(df$louvain, levels = c('1', '6', '5', '0', '2', '3', '7', '4'))
ggplot(df, aes(x=louvain, y=P3.mCherry.561D.A.Geo.Mean, fill=louvain)) +
geom_boxplot() +
geom_jitter(width=0.1,alpha=0.2) +
scale_fill_manual("legend", values = seurat_colors) +
labs(fill = 'Clusters') + xlab("Clusters") + ylab("FCS Hhex expression") +
theme_classic(base_size = 12)
plot_ps(subset, by='P3.GFP.A.Geo.Mean') +
theme_classic(base_size = 12) +
ggtitle('FACS Sox2 expression')
df <- subset@meta.data %>% filter(louvain %in% c('1', '6', '5'))
p1 <- ggplot(df, aes(x=louvain, y=P3.GFP.A.Geo.Mean, fill=louvain)) +
geom_boxplot() +
geom_jitter(width=0.1,alpha=0.2) +
labs(fill = 'Clusters') + xlab("Clusters") + ylab("FCS Sox2 expression") +
coord_cartesian(ylim = c(0, 4000)) +
scale_fill_manual("legend", values = seurat_colors) +
theme(legend.position="none") +
facet_grid(cols = vars('2iLIF_PrE'))
df <- subset@meta.data %>% filter(louvain %in% c('0', '2'))
df$louvain <- factor(df$louvain, levels = c('0', '2'))
p2 <- ggplot(df, aes(x=louvain, y=P3.GFP.A.Geo.Mean, fill=louvain)) +
geom_boxplot() +
geom_jitter(width=0.1,alpha=0.2) +
labs(fill = 'Clusters') + xlab("Clusters") + ylab("FCS Sox2 expression") +
coord_cartesian(ylim = c(0, 4000)) +
scale_fill_manual("legend", values = seurat_colors) +
theme(legend.position="none") +
facet_grid(cols = vars('NEDiff'))
df <- subset@meta.data %>% filter(louvain %in% c('3', '7', '4'))
df$louvain <- factor(df$louvain, levels = c('3', '7', '4'))
p3 <- ggplot(df, aes(x=louvain, y=P3.GFP.A.Geo.Mean, fill=louvain)) +
geom_boxplot() +
geom_jitter(width=0.1,alpha=0.2) +
labs(fill = 'Clusters') + xlab("Clusters") + ylab("FCS Sox2 expression") +
coord_cartesian(ylim = c(0, 4000)) +
# scale_fill_manual("legend", values = seurat_colors) +
theme(legend.position="none") +
facet_grid(cols = vars('PrEDiff'))
p1 + p2 + p3
Problem: D0 in from pseudotime is later
then D1. I can redo the pseudotime by removing
2iLIF and setting D0 as a starting point.
Otherwise I can start just ignore the D0.
df <- subset@meta.data %>%
filter(louvain %in% c('1', '6', '5', '3', '7', '4')) %>%
dplyr::select(louvain, cs_mean, cs_median, dpt_pseudotime, Stage) %>%
dplyr::filter(!is.na(cs_mean)) %>%
dplyr::arrange(dpt_pseudotime)
ggplot() +
geom_point(data=df, aes(x=dpt_pseudotime, y=cs_mean, color=Stage)) +
geom_smooth(data=df, aes(x=dpt_pseudotime, y=cs_mean)) +
ggtitle('Mean cell size normalized by 2iLIF') +
scale_color_manual("legend", values = stage_colors) +
ylim(0, 2)
ggplot() +
geom_point(data=df, aes(x=dpt_pseudotime, y=cs_median, color=Stage)) +
geom_smooth(data=df, aes(x=dpt_pseudotime, y=cs_median)) +
scale_color_manual("legend", values = stage_colors) +
ggtitle('Median cell size normalized by 2iLIF') +
ylim(0, 2)
Figures can be found in ../reports/
Idents(subset) <- subset$louvain
vitro_markers <- FindAllMarkers(subset, only.pos = T)
vitro_markers_filt <- vitro_markers %>%
group_by(cluster) %>%
filter(p_val_adj < 0.05 & avg_log2FC > 0.5) %>%
slice_max(n = 20, order_by = avg_log2FC)
vitro_markers_filt
Source code stolen from Jose with his blessing.
library(GeneOverlap)
library(reshape2)
library(ggplot2)
marker_database <- read.delim('../data/marker_database.tsv', stringsAsFactors=FALSE)
marker_database$genes <- strsplit(tolower(marker_database$genes), split = ",")
database_lists <- marker_database$genes
names(database_lists) <- paste(marker_database$publication, "|", marker_database$cluster)
database_lists <- database_lists[grepl("Nowotschin", names(database_lists))]
data(GeneOverlap)
genomic_size <- gs.RNASeq
my_list <- vitro_markers_filt[c("cluster", "gene")]
my_list <- sapply(as.vector(unique(my_list$cluster)), FUN = function(x, gene_list){
return(my_list %>% filter(cluster == x) %>% pull(gene) %>% tolower)
}, my_list, USE.NAMES = TRUE, simplify = FALSE)
# run GOM
gom.obj <- newGOM(gsetA = my_list, gsetB = database_lists, genome.size = genomic_size)
# extract data
pvals <- getMatrix(gom.obj, name = "pval")
odds <- getMatrix(gom.obj, name = "odds.ratio")
log_odds <- log2(odds)
# heatmap based on p-values
melt_pvals <- melt(pvals)
melt_pvals$value <- p.adjust(melt_pvals$value, method = "bonferroni")
colnames(melt_pvals) <- c("Louvain", "Database_lists", "value")
melt_pvals$Study <- unlist(lapply(strsplit(as.character(melt_pvals$Database_lists), split = "|", fixed = T), "[", 1))
melt_pvals$Study <- trimws(melt_pvals$Study)
melt_pvals$cell_type <- unlist(lapply(strsplit(as.character(melt_pvals$Database_lists), split = "|", fixed = T), "[", 2))
melt_pvals$cell_type <- trimws(melt_pvals$cell_type)
melt_pvals <- melt_pvals[melt_pvals$Louvain %in% c('1', '6', '5', '0', '2', '3', '7', '4'), ]
melt_pvals$Louvain <- factor(melt_pvals$Louvain, levels = c('1', '6', '5', '0', '2', '3', '7', '4'))
ggplot(melt_pvals) +
geom_tile(aes(x = Louvain, y = cell_type, fill = -log10(value)), color = "white") +
#coord_equal() + # Makes the heatmap tiles square, not compatible with facet_grid
labs(x = "Louvain", y = "Database gene lists", fill = "-log10(p-value)") +
scale_fill_distiller(palette = "Spectral") +
facet_grid(Study~., scales = "free", space = "free_y") +
theme(axis.text.x = element_text(angle = 90), strip.text.y = element_text(angle = 0))
melt_logodds <- melt(log_odds)
melt_logodds <- melt_logodds[melt_logodds$Var1 %in% c('1', '6', '5', '0', '2', '3', '7', '4'), ]
colnames(melt_logodds) <- c("PC_lists","Database_lists","value")
melt_logodds$Study <- unlist(lapply(strsplit(as.character(melt_logodds$Database_lists), split = "|", fixed = T), "[", 1))
melt_logodds$Study <- trimws(melt_logodds$Study)
melt_logodds$cell_type <- unlist(lapply(strsplit(as.character(melt_logodds$Database_lists), split = "|", fixed = T), "[", 2))
melt_logodds$cell_type <- trimws(melt_logodds$cell_type)
melt_pvals$odds <- melt_logodds$value
melt_pvals$odds[!is.finite(melt_pvals$odds)] <- NA
limit <- max(abs(melt_pvals$odds), na.rm = T) * c(-1, 1)
ggplot(melt_pvals) + geom_point(aes(x = factor(Louvain), y = factor(cell_type), color = odds, size = -log10(value) )) +
labs(x = "Louvain", y = "Database gene lists", color = "log2(odds ratio)", size = "-log10(p-value)") +
scale_color_distiller(palette = "RdBu", direction = -1, limit = limit) + scale_size_continuous(range = c(1,5)) +
facet_grid(Study~., scales = "free", space = "free", drop = T) + theme_bw()
genes <- c("Klf2", "Klf4", "Klf5", "Nanog", "Sox2", "Pou5f1", "Otx2", "Pou3f1",
"Cdx2", "Eomes", "Fgf5", "Elf5", "Zfp42")
for(gene in genes) {
if (gene %in% rownames(subset)) {
p <- FeaturePlot(subset, features= gene, pt.size = 1, cols = c("#FFE4FD","magenta4"))
print(p)
}
}